import numpy as np #linear algebra
import pandas as pd # data processing,CSV file I/O(e.g pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from warnings import filterwarnings
filterwarnings("ignore")
data = sns.load_dataset("tips")
data
| total_bill | tip | sex | smoker | day | time | size | |
|---|---|---|---|---|---|---|---|
| 0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
| 1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
| 2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
| 3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
| 4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 239 | 29.03 | 5.92 | Male | No | Sat | Dinner | 3 |
| 240 | 27.18 | 2.00 | Female | Yes | Sat | Dinner | 2 |
| 241 | 22.67 | 2.00 | Male | Yes | Sat | Dinner | 2 |
| 242 | 17.82 | 1.75 | Male | No | Sat | Dinner | 2 |
| 243 | 18.78 | 3.00 | Female | No | Thur | Dinner | 2 |
244 rows × 7 columns
data.head()
| total_bill | tip | sex | smoker | day | time | size | |
|---|---|---|---|---|---|---|---|
| 0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
| 1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
| 2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
| 3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
| 4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
data.shape
(244, 7)
data.duplicated().sum()
1
data.drop_duplicates(inplace=True)
data.duplicated().sum()
0
data.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 243 entries, 0 to 243 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 total_bill 243 non-null float64 1 tip 243 non-null float64 2 sex 243 non-null category 3 smoker 243 non-null category 4 day 243 non-null category 5 time 243 non-null category 6 size 243 non-null int64 dtypes: category(4), float64(2), int64(1) memory usage: 9.1 KB
data.columns
Index(['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size'], dtype='object')
#VISUALIZATION
plt.bar(data['smoker'],data['total_bill'])
plt.xticks(rotation=90)
plt.show()
fig=px.bar(data,x='tip',y='day',color='tip')
fig.show()
fig=px.violin(data,x='time',y='sex',color='time')
fig.show()
plt.figure(figsize=(10,4))
sns.countplot(x='size', data=data, color='b')
plt.show()
sns.lineplot(x='size', y='total_bill', data=data)
<AxesSubplot:xlabel='size', ylabel='total_bill'>
sns.barplot(data['size'],data['tip'],color='r')
plt.xticks(rotation=90)
plt.show()
sns.scatterplot(data=data, x='size', y='total_bill')
plt.xlabel('size')
plt.ylabel('total_bill')
plt.show()
sns.displot(data["sex"])
<seaborn.axisgrid.FacetGrid at 0x1fe497a1f40>
sns.countplot(x='time',data=data)
plt.xticks(rotation=90)
(array([0, 1]), [Text(0, 0, 'Lunch'), Text(1, 0, 'Dinner')])
sns.boxplot(x='day',y='tip',data=data)
<AxesSubplot:xlabel='day', ylabel='tip'>
sns.violinplot(x='sex',y='size',data=data)
<AxesSubplot:xlabel='sex', ylabel='size'>
#MODEL BUILDING
X = data[['total_bill','tip','size']]
X.head()
| total_bill | tip | size | |
|---|---|---|---|
| 0 | 16.99 | 1.01 | 2 |
| 1 | 10.34 | 1.66 | 3 |
| 2 | 21.01 | 3.50 | 3 |
| 3 | 23.68 | 3.31 | 2 |
| 4 | 24.59 | 3.61 | 4 |
y = data['sex']
y.head()
0 Female 1 Male 2 Male 3 Male 4 Female Name: sex, dtype: category Categories (2, object): ['Male', 'Female']
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y=le.fit_transform(y)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.3)
from sklearn.ensemble import AdaBoostClassifier
abc = AdaBoostClassifier(n_estimators=50, learning_rate=1, random_state=0)
model = abc.fit(X_train, y_train)
y_pred = model.predict(X_test)
from sklearn.metrics import accuracy_score
print("AdaBoost Classifier Model Accuracy:",accuracy_score(y_test, y_pred))
AdaBoost Classifier Model Accuracy: 0.684931506849315